1 TADPOLE Univariate

1.0.1 Loading the libraries

library("FRESA.CAD")
library(psych)
library(whitening)
library("vioplot")

library(readxl)
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

1.1 The data set

TADPOLE_D1_D2 <- read.csv("~/GitHub/BSWiMS/Data/TADPOLE/TADPOLE_D1_D2.csv")
TADPOLE_D1_D2_Dict <- read.csv("~/GitHub/BSWiMS/Data/TADPOLE/TADPOLE_D1_D2_Dict.csv")
TADPOLE_D1_D2_Dict_LR <- as.data.frame(read_excel("~/GitHub/BSWiMS/Data/TADPOLE/TADPOLE_D1_D2_Dict_LR.xlsx",sheet = "LeftRightFeatures"))


rownames(TADPOLE_D1_D2_Dict) <- TADPOLE_D1_D2_Dict$FLDNAME

1.2 Conditioning the data


# mm3 to mm
isVolume <- c("Ventricles","Hippocampus","WholeBrain","Entorhinal","Fusiform","MidTemp","ICV",
              TADPOLE_D1_D2_Dict$FLDNAME[str_detect(TADPOLE_D1_D2_Dict$TEXT,"Volume")]
              )


#TADPOLE_D1_D2[,isVolume] <- apply(TADPOLE_D1_D2[,isVolume],2,'^',(1/3))
TADPOLE_D1_D2[,isVolume] <- TADPOLE_D1_D2[,isVolume]^(1/3)

# mm2 to mm
isArea <- TADPOLE_D1_D2_Dict$FLDNAME[str_detect(TADPOLE_D1_D2_Dict$TEXT,"Area")]
TADPOLE_D1_D2[,isArea] <- sqrt(TADPOLE_D1_D2[,isArea])

# Get only cross sectional measurements
FreeSurfersetCross <- str_detect(colnames(TADPOLE_D1_D2),"UCSFFSX")

# The subset of baseline measurements
baselineTadpole <- subset(TADPOLE_D1_D2,VISCODE=="bl")
table(baselineTadpole$DX)
                   Dementia Dementia to MCI             MCI MCI to Dementia 
          7             336               1             864               5 
  MCI to NL              NL       NL to MCI 
          2             521               1 
table(baselineTadpole$DX_bl)

AD CN EMCI LMCI SMC 342 417 310 562 106


rownames(baselineTadpole) <- baselineTadpole$PTID


validBaselineTadpole <- cbind(DX=baselineTadpole$DX_bl,
                                 AGE=baselineTadpole$AGE,
                                 Gender=1*(baselineTadpole$PTGENDER=="Female"),
                                 ADAS11=baselineTadpole$ADAS11,
                                 ADAS13=baselineTadpole$ADAS13,
                                 MMSE=baselineTadpole$MMSE,
                                 RAVLT_immediate=baselineTadpole$RAVLT_immediate,
                                 RAVLT_learning=baselineTadpole$RAVLT_learning,
                                 RAVLT_forgetting=baselineTadpole$RAVLT_forgetting,
                                 RAVLT_perc_forgetting=baselineTadpole$RAVLT_perc_forgetting,
                                 FAQ=baselineTadpole$FAQ,
                                 Ventricles=baselineTadpole$Ventricles,
                                 Hippocampus=baselineTadpole$Hippocampus,
                                 WholeBrain=baselineTadpole$WholeBrain,
                                 Entorhinal=baselineTadpole$Entorhinal,
                                 Fusiform=baselineTadpole$Fusiform,
                                 MidTemp=baselineTadpole$MidTemp,
                                 ICV=baselineTadpole$ICV,
                                 baselineTadpole[,FreeSurfersetCross])


LeftFields <- TADPOLE_D1_D2_Dict_LR$LFN
names(LeftFields) <- LeftFields
LeftFields <- LeftFields[LeftFields %in% colnames(validBaselineTadpole)]
RightFields <- TADPOLE_D1_D2_Dict_LR$RFN
names(RightFields) <- RightFields
RightFields <- RightFields[RightFields %in% colnames(validBaselineTadpole)]

## Normalize to ICV
validBaselineTadpole$Ventricles=validBaselineTadpole$Ventricles/validBaselineTadpole$ICV
validBaselineTadpole$Hippocampus=validBaselineTadpole$Hippocampus/validBaselineTadpole$ICV
validBaselineTadpole$WholeBrain=validBaselineTadpole$WholeBrain/validBaselineTadpole$ICV
validBaselineTadpole$Entorhinal=validBaselineTadpole$Entorhinal/validBaselineTadpole$ICV
validBaselineTadpole$Fusiform=validBaselineTadpole$Fusiform/validBaselineTadpole$ICV
validBaselineTadpole$MidTemp=validBaselineTadpole$MidTemp/validBaselineTadpole$ICV

leftData <- validBaselineTadpole[,LeftFields]/validBaselineTadpole$ICV
RightData <- validBaselineTadpole[,RightFields]/validBaselineTadpole$ICV

## get mean and relative difference 
meanLeftRight <- (leftData + RightData)/2
difLeftRight <- abs(leftData - RightData)
reldifLeftRight <- difLeftRight/meanLeftRight
colnames(meanLeftRight) <- paste("M",colnames(meanLeftRight),sep="_")
colnames(difLeftRight) <- paste("D",colnames(difLeftRight),sep="_")
colnames(reldifLeftRight) <- paste("RD",colnames(reldifLeftRight),sep="_")


validBaselineTadpole <- validBaselineTadpole[,!(colnames(validBaselineTadpole) %in% 
                                               c(LeftFields,RightFields))]
#validBaselineTadpole <- cbind(validBaselineTadpole,meanLeftRight,difLeftRight,reldifLeftRight)
validBaselineTadpole <- cbind(validBaselineTadpole,meanLeftRight,difLeftRight)

## Remove columns with too many NA more than %15 of NA
nacount <- apply(is.na(validBaselineTadpole),2,sum)/nrow(validBaselineTadpole) < 0.15
diagnose <- validBaselineTadpole$DX
pander::pander(table(diagnose))
AD CN EMCI LMCI SMC
342 417 310 562 106
validBaselineTadpole <- validBaselineTadpole[,nacount]
## Remove character columns
ischar <- sapply(validBaselineTadpole,class) == "character"
validBaselineTadpole <- validBaselineTadpole[,!ischar]
## Place back diagnose
validBaselineTadpole$DX <- diagnose


validBaselineTadpole <- validBaselineTadpole[complete.cases(validBaselineTadpole),]
ischar <- sapply(validBaselineTadpole,class) == "character"
validBaselineTadpole[,!ischar] <- sapply(validBaselineTadpole[,!ischar],as.numeric)

colnames(validBaselineTadpole) <- str_remove_all(colnames(validBaselineTadpole),"_UCSFFSX_11_02_15_UCSFFSX51_08_01_16")
colnames(validBaselineTadpole) <- str_replace_all(colnames(validBaselineTadpole)," ","_")
validBaselineTadpole$LONISID <- NULL
validBaselineTadpole$IMAGEUID <- NULL
validBaselineTadpole$LONIUID <- NULL

diagnose <- as.character(validBaselineTadpole$DX)
validBaselineTadpole$DX <- diagnose
pander::pander(table(validBaselineTadpole$DX))
AD CN EMCI LMCI SMC
245 359 272 444 93


validBaselineTadpole[validBaselineTadpole$DX %in% c("EMCI","LMCI"),"DX"] <- "MCI" 
validBaselineTadpole[validBaselineTadpole$DX %in% c("CN","SMC"),"DX"] <- "NL" 

pander::pander(table(validBaselineTadpole$DX))
AD MCI NL
245 716 452

1.3 Get the Time To Event on MCI Subjects


subjectsID <- rownames(validBaselineTadpole)
visitsID <- unique(TADPOLE_D1_D2$VISCODE)
baseDx <- TADPOLE_D1_D2[TADPOLE_D1_D2$VISCODE=="bl",c("PTID","DX","EXAMDATE")]
rownames(baseDx) <- baseDx$PTID 
baseDx <- baseDx[subjectsID,]
lastDx <- baseDx
toDementia <- baseDx
table(lastDx$DX)
   Dementia Dementia to MCI             MCI MCI to Dementia       MCI to NL 
        244               1             711               2               2 
         NL       NL to MCI 
        452               1 
hasDementia <- lastDx$PTID[str_detect(lastDx$DX,"Dementia")]


for (vid in visitsID)
{
  DxValue <- TADPOLE_D1_D2[TADPOLE_D1_D2$VISCODE==vid,c("PTID","DX","EXAMDATE")]
  rownames(DxValue) <- DxValue$PTID 
  DxValue <- DxValue[DxValue$PTID %in% subjectsID,]
  noDX <- DxValue$PTID[nchar(DxValue$DX) < 1]
  print(length(noDX))
  DxValue[noDX,] <- lastDx[noDX,]
  inLast <- lastDx$PTID[lastDx$PTID %in% DxValue$PTID]
  print(length(inLast))
  lastDx[inLast,] <- DxValue[inLast,]
  noDementia <- !(toDementia$PTID %in% hasDementia)
  toDementia[noDementia,] <- lastDx[noDementia,]
  hasDementia <- unique(c(hasDementia,lastDx$PTID[str_detect(lastDx$DX,"Dementia")]))
}

[1] 0 [1] 1413 [1] 2 [1] 1326 [1] 6 [1] 1218 [1] 23 [1] 1095 [1] 805 [1] 1058 [1] 29 [1] 710 [1] 20 [1] 212 [1] 14 [1] 167 [1] 32 [1] 553 [1] 25 [1] 298 [1] 18 [1] 130 [1] 667 [1] 667 [1] 112 [1] 112 [1] 176 [1] 176 [1] 177 [1] 177 [1] 625 [1] 625 [1] 251 [1] 251 [1] 159 [1] 159 [1] 7 [1] 7 [1] 17 [1] 99 [1] 9 [1] 63 [1] 1 [1] 1

table(lastDx$DX)
   Dementia Dementia to MCI             MCI MCI to Dementia       MCI to NL 
        428               2             463              80               7 
         NL  NL to Dementia       NL to MCI 
        406               1              26 
baseMCI <-baseDx$PTID[baseDx$DX == "MCI"]
lastDementia <- lastDx$PTID[str_detect(lastDx$DX,"Dementia")]
lastDementia2 <- toDementia$PTID[str_detect(toDementia$DX,"Dementia")]
lastNL <- lastDx$PTID[str_detect(lastDx$DX,"NL")]

MCIatBaseline <- baseDx[baseMCI,]
MCIatEvent <- toDementia[baseMCI,]
MCIatLast <- lastDx[baseMCI,]

MCIconverters <- MCIatBaseline[baseMCI %in% lastDementia,]
MCI_No_converters <- MCIatBaseline[!(baseMCI %in% MCIconverters$PTID),]
MCIconverters$TimeToEvent <- (as.Date(toDementia[MCIconverters$PTID,"EXAMDATE"]) 
                                   - as.Date(MCIconverters$EXAMDATE))

sum(MCIconverters$TimeToEvent ==0)

[1] 0



MCIconverters$AtEventDX <- MCIatEvent[MCIconverters$PTID,"DX"]
MCIconverters$LastDX <- MCIatLast[MCIconverters$PTID,"DX"]

MCI_No_converters$TimeToEvent <- (as.Date(lastDx[MCI_No_converters$PTID,"EXAMDATE"]) 
                                   - as.Date(MCI_No_converters$EXAMDATE))

MCI_No_converters$LastDX <- MCIatLast[MCI_No_converters$PTID,"DX"]

MCI_No_converters <- subset(MCI_No_converters,TimeToEvent > 0)

2 Prognosis MCI to AD Conversion

2.1 the set


MCIPrognosisIDs <- c(MCIconverters$PTID,MCI_No_converters$PTID)

TADPOLECrossMRI <- validBaselineTadpole[MCIPrognosisIDs,]
table(TADPOLECrossMRI$DX)

MCI 680

TADPOLECrossMRI$DX <- NULL
TADPOLECrossMRI$status <- 1*(rownames(TADPOLECrossMRI) %in% MCIconverters$PTID)
table(TADPOLECrossMRI$status)

0 1 436 244

2.1.0.1 Standarize the names for the reporting

dataframe <- TADPOLECrossMRI
outcome <- "status"

trainFraction <- 0.5
rhoThreshold <- 0.6
TopVariables <- 5
aucTHR <- 0.55

set.seed(5)
trainSample <- sample(nrow(dataframe),nrow(dataframe)*trainFraction)

trainDataFrame <- dataframe[trainSample,]
testDataFrame <- dataframe[-trainSample,]

2.1.1 Data specs

pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
rows col
680 327
pander::pander(table(dataframe[,outcome]))
0 1
436 244
pander::pander(table(trainDataFrame[,outcome]))
0 1
208 132
pander::pander(table(testDataFrame[,outcome]))
0 1
228 112

varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]

2.2 Univariate


univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","IDI","ROCAUC","cStatCorr")
univar <- uniRankVar(varlist,
               paste(outcome,"~1"),
               outcome,
               trainDataFrame,
              rankingTest = "CStat")

100 : M_ST24SA 200 : D_ST49TA 300 : D_ST47CV



#univar$orderframe[1:5,univariate_columns]
univarTest <- uniRankVar(varlist,
               paste(outcome,"~1"),
               outcome,
               trainDataFrame,
               testData=testDataFrame,
              rankingTest = "CStat")

100 : M_ST24SA 200 : D_ST49TA 300 : D_ST47CV


univar$orderframe$BACC <- (univar$orderframe$Sensitivity + univar$orderframe$Specificity)/2.0
univarTest$orderframe$BACC <- (univarTest$orderframe$Sensitivity + univarTest$orderframe$Specificity)/2.0

#pROC::roc(trainDataFrame$class,trainDataFrame[,univar$orderframe$Name[1]],direction=">",plot=TRUE,auc=TRUE,quiet = TRUE)

2.3 Decorrelation with UPSTM Blind

DEdataframe <- IDeA(trainDataFrame,thr=rhoThreshold)
predTestDe <- predictDecorrelate(DEdataframe,testDataFrame)

ltvar <- getLatentCoefficients(DEdataframe);
pander::pander(head(ltvar))
  • La_ADAS13:

    ADAS11 ADAS13
    -1.39 1
  • La_RAVLT_immediate:

    ADAS11 RAVLT_immediate
    1.38 1
  • La_RAVLT_perc_forgetting:

    RAVLT_immediate RAVLT_forgetting RAVLT_perc_forgetting
    1.58 -10.1 1
  • La_WholeBrain:

    WholeBrain M_ST36CV
    1 -2.7
  • La_ST10CV:

    ICV ST10CV
    -0.998 1
  • La_ST127SV:

    Ventricles ST127SV
    -23.2 1
pander::pander(c(Avlen=mean(sapply(ltvar,length))))
Avlen
2.41
pander::pander(c(Latent=length(ltvar)))
Latent
119


varlistDe <-  colnames(DEdataframe)[colnames(DEdataframe) != outcome];
univarDe <- uniRankVar(varlistDe,
              paste(outcome,"~1"),
                outcome,
              DEdataframe,
              rankingTest = "CStat")

100 : M_ST24SA 200 : D_ST49TA 300 : La_D_ST47CV


univarDeTest <- uniRankVar(varlistDe,
              paste(outcome,"~1"),
                outcome,
              DEdataframe,
              testData=predTestDe,
              rankingTest = "CStat")

100 : M_ST24SA 200 : D_ST49TA 300 : La_D_ST47CV


univarDe$orderframe$BACC <- (univarDe$orderframe$Sensitivity + univarDe$orderframe$Specificity)/2.0
univarDeTest$orderframe$BACC <- (univarDeTest$orderframe$Sensitivity + univarDeTest$orderframe$Specificity)/2.0

#univarDe$orderframe[1:5,univariate_columns]
#univarDeTest$orderframe[1:5,univariate_columns]

#pROC::roc(DEdataframe$class,DEdataframe[,univarDe$orderframe$Name[1]],direction=">",plot=TRUE,auc=TRUE,quiet = TRUE)

2.4 Decorrelation with UPSTM Blind/Spearman

DEdataframeSpear <- IDeA(trainDataFrame,thr=rhoThreshold,method="spearman")
predTestDeSpear <- predictDecorrelate(DEdataframeSpear,testDataFrame)

ltvar <- getLatentCoefficients(DEdataframeSpear);
pander::pander(head(ltvar))
  • La_ADAS13:

    ADAS11 ADAS13
    -1.39 1
  • La_RAVLT_immediate:

    ADAS11 RAVLT_immediate
    1.38 1
  • La_RAVLT_perc_forgetting:

    RAVLT_immediate RAVLT_forgetting RAVLT_perc_forgetting
    1.58 -10.1 1
  • La_Hippocampus:

    Hippocampus M_ST24TA
    1 -1.62
  • La_WholeBrain:

    WholeBrain M_ST31TA
    1 -7.57
  • La_ST10CV:

    ICV ST10CV
    -0.998 1
pander::pander(c(Avlen=mean(sapply(ltvar,length))))
Avlen
2.35
pander::pander(c(Latent=length(ltvar)))
Latent
111


varlistDeSpear <-  colnames(DEdataframeSpear)[colnames(DEdataframeSpear) != outcome];
univarDeSpear <- uniRankVar(varlistDeSpear,
              paste(outcome,"~1"),
                outcome,
              DEdataframeSpear,
              rankingTest = "CStat")

100 : M_ST24SA 200 : D_ST49TA 300 : La_D_ST47CV


univarDeSpearTest <- uniRankVar(varlistDeSpear,
              paste(outcome,"~1"),
                outcome,
              DEdataframeSpear,
              testData=predTestDeSpear,
              rankingTest = "CStat")

100 : M_ST24SA 200 : D_ST49TA 300 : La_D_ST47CV


univarDeSpear$orderframe$BACC <- (univarDeSpear$orderframe$Sensitivity + univarDeSpear$orderframe$Specificity)/2.0
univarDeSpearTest$orderframe$BACC <- (univarDeSpearTest$orderframe$Sensitivity + univarDeSpearTest$orderframe$Specificity)/2.0

2.5 Decorrelation with UPSTM Driven


DriDEdataframe <- IDeA(trainDataFrame,Outcome=outcome,thr=rhoThreshold)
predTestDri <- predictDecorrelate(DriDEdataframe,testDataFrame)


ltvar <- getLatentCoefficients(DriDEdataframe);
pander::pander(head(ltvar))
  • La_ADAS11:

    ADAS11 ADAS13
    1 -0.649
  • La_RAVLT_immediate:

    ADAS13 RAVLT_immediate
    1.02 1
  • La_RAVLT_forgetting:

    RAVLT_learning RAVLT_forgetting RAVLT_perc_forgetting
    -0.373 1 -0.0585
  • La_Ventricles:

    Ventricles ST7SV
    1 -0.0322
  • La_Hippocampus:

    Hippocampus M_ST29SV
    1 -1.23
  • La_WholeBrain:

    WholeBrain M_ST36CV
    1 -2.7
pander::pander(c(Avlen=mean(sapply(ltvar,length))))
Avlen
2.4
pander::pander(c(Latent=length(ltvar)))
Latent
117


varlistDe <-  colnames(DriDEdataframe)[colnames(DriDEdataframe) != outcome];
univarDeDri <- uniRankVar(varlistDe,
              paste(outcome,"~1"),
                outcome,
              DriDEdataframe,
              rankingTest = "CStat")

100 : M_ST24SA 200 : D_ST49TA 300 : La_D_ST47CV


univarDeDriTest <- uniRankVar(varlistDe,
              paste(outcome,"~1"),
                outcome,
              DriDEdataframe,
              testData=predTestDri,
              rankingTest = "CStat")

100 : M_ST24SA 200 : D_ST49TA 300 : La_D_ST47CV


univarDeDri$orderframe$BACC <- (univarDeDri$orderframe$Sensitivity + univarDeDri$orderframe$Specificity)/2.0
univarDeDriTest$orderframe$BACC <- (univarDeDriTest$orderframe$Sensitivity + univarDeDriTest$orderframe$Specificity)/2.0

2.6 Decorrelation with UPSTM Driven and Spearman


DriDEdataframeSpear <- IDeA(trainDataFrame,Outcome=outcome,thr=rhoThreshold,method="spearman")
predTestDriSpear <- predictDecorrelate(DriDEdataframeSpear,testDataFrame)


ltvar <- getLatentCoefficients(DriDEdataframeSpear);
pander::pander(head(ltvar))
  • La_ADAS11:

    ADAS11 ADAS13
    1 -0.649
  • La_RAVLT_immediate:

    ADAS13 RAVLT_immediate
    1.02 1
  • La_RAVLT_forgetting:

    RAVLT_learning RAVLT_forgetting RAVLT_perc_forgetting
    -0.373 1 -0.0585
  • La_Ventricles:

    Ventricles ST7SV
    1 -0.0322
  • La_Hippocampus:

    Hippocampus M_ST29SV
    1 -1.23
  • La_WholeBrain:

    WholeBrain M_ST29SV
    1 -1.94
pander::pander(c(Avlen=mean(sapply(ltvar,length))))
Avlen
2.28
pander::pander(c(Latent=length(ltvar)))
Latent
105


varlistDeSpear <-  colnames(DriDEdataframeSpear)[colnames(DriDEdataframeSpear) != outcome];
univarDeDriSpear <- uniRankVar(varlistDeSpear,
              paste(outcome,"~1"),
                outcome,
              DriDEdataframeSpear,
              rankingTest = "CStat")

100 : M_ST24SA 200 : D_ST49TA 300 : La_D_ST47CV


univarDeDriSpearTest <- uniRankVar(varlistDeSpear,
              paste(outcome,"~1"),
                outcome,
              DriDEdataframeSpear,
              testData=predTestDriSpear,
              rankingTest = "CStat")

100 : M_ST24SA 200 : D_ST49TA 300 : La_D_ST47CV


univarDeDriSpear$orderframe$BACC <- (univarDeDriSpear$orderframe$Sensitivity + univarDeDriSpear$orderframe$Specificity)/2.0
univarDeDriSpearTest$orderframe$BACC <- (univarDeDriSpearTest$orderframe$Sensitivity + univarDeDriSpearTest$orderframe$Specificity)/2.0

2.6.1 Get continous correlated features

iscontinous <- sapply(apply(trainDataFrame,2,unique),length) > 5 ## Only variables with enough samples

noclassData <- trainDataFrame[,iscontinous]
cmat <- cor(noclassData);
diag(cmat) <- 0;
maxcor <- apply(cmat>rhoThreshold,2,sum);
topcor <- names(maxcor[maxcor > 0]) ## Only correlated features will be PCA
pander::pander(c(Ncor=length(topcor)))
Ncor
192
cmat <- NULL

notcorr <- colnames(trainDataFrame)[!(colnames(trainDataFrame) %in% topcor)]
noclassData <- noclassData[,topcor]
noclassTestData <- testDataFrame[,topcor]

2.7 PCA Analysis


### PCA 

pc <- principal(noclassData,4*TopVariables,rotate="varimax")   #principal components
pander::pander(t(pc$loadings[1:TopVariables,1:TopVariables]))
  ADAS11 ADAS13 RAVLT_immediate RAVLT_learning RAVLT_forgetting
RC1 -0.2472 -0.27699 0.3309 0.2521 0.0109
RC2 0.0964 0.14173 -0.2176 -0.2052 -0.0339
RC4 -0.0679 -0.07057 0.0798 -0.0374 -0.0427
RC6 -0.0259 -0.00945 0.1057 -0.0340 -0.0777
RC5 -0.7110 -0.75567 0.5859 0.5115 -0.3904
PCA_Train <- as.data.frame(cbind(predict(pc,noclassData),trainDataFrame[,notcorr]))
colnames(PCA_Train) <- c(colnames(predict(pc,noclassData)),notcorr)

PCA_Predicted <- as.data.frame(cbind(predict(pc,noclassTestData),testDataFrame[,notcorr]))
colnames(PCA_Predicted) <- c(colnames(predict(pc,noclassTestData)),notcorr)

iscontinous <- colnames(PCA_Predicted)[sapply(apply(PCA_Predicted,2,unique),length) > 5] ## Only variables with enough samples
varlistPCA <-  iscontinous;

univarPCA <- uniRankVar(varlistPCA,
              paste(outcome,"~1"),
                outcome,
              PCA_Train,
              rankingTest = "CStat")

100 : D_ST31TS


univarPCATest <- uniRankVar(varlistPCA,
              paste(outcome,"~1"),
                outcome,
              PCA_Train,
              testData=PCA_Predicted,
              rankingTest = "CStat")

100 : D_ST31TS


univarPCA$orderframe$BACC <- (univarPCA$orderframe$Sensitivity + univarPCA$orderframe$Specificity)/2.0
univarPCATest$orderframe$BACC <- (univarPCATest$orderframe$Sensitivity + univarPCATest$orderframe$Specificity)/2.0

2.8 EFA


uls <- fa(noclassData,4*TopVariables,rotate="varimax")  #unweighted least squares is minres 
pander::pander(t(uls$weights[1:TopVariables,1:TopVariables])) 
  ADAS11 ADAS13 RAVLT_immediate RAVLT_learning RAVLT_forgetting
MR1 0.0583 -0.0786 0.000637 0.00973 -0.044826
MR2 -0.0596 0.0695 -0.035720 0.01209 0.000487
MR4 0.0725 -0.0852 -0.153868 0.05421 -0.039901
MR3 -0.0643 0.2033 0.022566 -0.02166 0.089076
MR5 -0.1106 0.1013 -0.048827 0.03481 -0.032652
EFA_Train <- as.data.frame(cbind(predict(uls,noclassData),trainDataFrame[,notcorr]))
colnames(EFA_Train) <- c(colnames(predict(uls,noclassData)),notcorr)
EFA_Predicted <- as.data.frame(cbind(predict(uls,noclassTestData),testDataFrame[,notcorr]))
colnames(EFA_Predicted) <- c(colnames(predict(uls,noclassTestData)),notcorr)

iscontinous <- colnames(EFA_Predicted)[sapply(apply(EFA_Predicted,2,unique),length) > 5] ## Only variables with enough 
varlistEFA <-  iscontinous
univarEFA <- uniRankVar(varlistEFA,
              paste(outcome,"~1"),
                outcome,
              EFA_Train,
              rankingTest = "CStat")

100 : D_ST31TS


univarEFATest <- uniRankVar(varlistEFA,
              paste(outcome,"~1"),
                outcome,
              EFA_Train,
              testData=EFA_Predicted,
              rankingTest = "CStat")

100 : D_ST31TS


univarEFA$orderframe$BACC <- (univarEFA$orderframe$Sensitivity + univarEFA$orderframe$Specificity)/2.0
univarEFATest$orderframe$BACC <- (univarEFATest$orderframe$Sensitivity + univarEFATest$orderframe$Specificity)/2.0

2.9 White

WhiteMat = whiteningMatrix(cov(noclassData), method="PCA")
sum(is.na(WhiteMat))

[1] 0

tokeep <- apply(is.na(WhiteMat),1,sum) == 0
WhiteMat <- WhiteMat[tokeep,]
sum(is.na(WhiteMat))

[1] 0

sum(apply(abs(WhiteMat),1,sum) > 1.0e6)

[1] 0

tokeep <- apply(abs(WhiteMat),1,sum) < 1.0e6
WhiteMat <- WhiteMat[tokeep,]
sum(apply(abs(WhiteMat),1,sum) > 1.0e6)

[1] 0


pander::pander(c(ncol=ncol(WhiteMat),nrow=nrow(WhiteMat)))
ncol nrow
192 192

pander::pander(WhiteMat[1:TopVariables,1:TopVariables]) 
  ADAS11 ADAS13 RAVLT_immediate RAVLT_learning RAVLT_forgetting
L1 0.00197 0.00336 -0.00552 -0.00107 0.001595
L2 0.03130 0.04447 -0.08537 -0.01298 -0.013590
L3 0.01492 0.02388 0.00350 -0.00265 0.000399
L4 0.09347 0.12706 0.10296 0.00277 0.005494
L5 -0.01022 -0.00902 0.01445 0.01523 0.004159
PCAWhite_Train <- as.data.frame(cbind(tcrossprod(as.matrix(noclassData), WhiteMat),trainDataFrame[,notcorr]))
colnames(PCAWhite_Train) <- c(colnames(tcrossprod(as.matrix(noclassData), WhiteMat)),notcorr)

sum(is.na(PCAWhite_Train))

[1] 0




PCAWhitePredicted <- as.data.frame(cbind(tcrossprod(as.matrix(noclassTestData), WhiteMat),testDataFrame[,notcorr]))
colnames(PCAWhitePredicted) <- c(colnames(tcrossprod(as.matrix(noclassTestData), WhiteMat)),notcorr)

sum(is.na(PCAWhitePredicted))

[1] 0


iscontinous <- colnames(PCAWhitePredicted)[sapply(apply(PCAWhitePredicted,2,unique),length) > 5] ## Only variables with enough 
varlistWhite <-  iscontinous

univarWhite <- uniRankVar(varlistWhite,
              paste(outcome,"~1"),
                outcome,
              PCAWhite_Train,
              rankingTest = "CStat")

100 : L100 200 : M_ST23TS 300 : D_ST129SA



univarWhiteTest <- uniRankVar(varlistWhite,
              paste(outcome,"~1"),
                outcome,
              PCAWhite_Train,
              testData=PCAWhitePredicted,
              rankingTest = "CStat")

100 : L100 200 : M_ST23TS 300 : D_ST129SA


univarWhite$orderframe$BACC <- (univarWhite$orderframe$Sensitivity + univarWhite$orderframe$Specificity)/2.0
univarWhiteTest$orderframe$BACC <- (univarWhiteTest$orderframe$Sensitivity + univarWhiteTest$orderframe$Specificity)/2.0

2.10 Correlation Matrices

2.10.1 RAW

par(cex=1.0,cex.main=0.8)
breaks <- c(0:5)/5.0;

cormat <- cor(testDataFrame,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
pander::pander(max(abs(cormat)))

1

pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
90% 95% 97.5% 99% 99.9%
0.294 0.433 0.535 0.627 0.792
pander::pander(c(Raw_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
Raw_fraction
0.0136

gplots::heatmap.2(abs(cormat),
                  trace = "none",
                  mar = c(5,5),
                  col=rev(heat.colors(5)),
                  breaks = breaks,
                  main = "Raw Correlation",
                  cexRow = 0.25,
                  cexCol = 0.25,
                  srtCol=35,
                  srtRow=75,
                  key.title=NA,
                  key.xlab="Spearman Correlation",
                  xlab="Feature", ylab="Feature"
                  )


#hist(cormat,freq=FALSE,
#     density=NULL,
#     xlim=c(-1,1),
#     ylim=c(0,4.0),
#     main="Raw Correlation",xlab="Spearman Correlation")

rawDen <- density(cormat,from=-1,to=1)
par(op)

2.10.2 UPSTM Blind

par(cex=1.0,cex.main=0.8)

## Train Correlation

cormat <- cor(DEdataframe,method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;

#pander::pander(colnames(cormat)[apply(abs(cormat),2,max)>rhoThreshold])

pander::pander(c(Train=max(abs(cormat))))
Train
0.599
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
90% 95% 97.5% 99% 99.9%
0.144 0.2 0.265 0.353 0.536
pander::pander(c(HCA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
HCA_fraction
0

## Test Correlation
cormat <- cor(predTestDe,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;

pander::pander(c(Test=max(abs(cormat))))
Test
0.853
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
90% 95% 97.5% 99% 99.9%
0.145 0.202 0.269 0.359 0.538
pander::pander(c(HCA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
HCA_fraction
0.00026

gplots::heatmap.2(abs(cormat),
                  trace = "none",
                  mar = c(5,5),
                  col=rev(heat.colors(5)),
                  breaks = breaks,
                  main = "Correlation after HCA",
                  cexRow = 0.25,
                  cexCol = 0.25,
                  srtCol=35,
                  srtRow=75,
                  key.title=NA,
                  key.xlab="Spearman Correlation",
                  xlab="Feature", ylab="Feature")


#hist(cormat,freq=FALSE,
#     density=NULL,
#     xlim=c(-1,1),
#     ylim=c(0,4.0),
#     main="Correlation after UPSTM",xlab="Spearman Correlation")

DeDen <- density(cormat,from=-1,to=1)


par(op)

2.10.3 UPSTM Blind/Spearman

par(cex=1.0,cex.main=0.8)

## Train Correlation

cormat <- cor(DEdataframeSpear,method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;

#pander::pander(colnames(cormat)[apply(abs(cormat),2,max)>rhoThreshold])

pander::pander(c(Train=max(abs(cormat))))
Train
0.837
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
90% 95% 97.5% 99% 99.9%
0.143 0.195 0.257 0.347 0.542
pander::pander(c(HCA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
HCA_fraction
0.000335

## Test Correlation
cormat <- cor(predTestDeSpear,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;

pander::pander(c(Test=max(abs(cormat))))
Test
0.878
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
90% 95% 97.5% 99% 99.9%
0.142 0.197 0.264 0.353 0.54
pander::pander(c(HCA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
HCA_fraction
0.000335

gplots::heatmap.2(abs(cormat),
                  trace = "none",
                  mar = c(5,5),
                  col=rev(heat.colors(5)),
                  breaks = breaks,
                  main = "Correlation after HCA:Spearman",
                  cexRow = 0.25,
                  cexCol = 0.25,
                  srtCol=35,
                  srtRow=75,
                  key.title=NA,
                  key.xlab="Spearman Correlation",
                  xlab="Feature", ylab="Feature")


#hist(cormat,freq=FALSE,
#     density=NULL,
#     xlim=c(-1,1),
#     ylim=c(0,4.0),
#     main="Correlation after UPSTM",xlab="Spearman Correlation")

DeSpearDen <- density(cormat,from=-1,to=1)

par(op)

2.10.4 UPSTM Driven

par(cex=1.0,cex.main=0.8)

## Train Correlation

cormat <- cor(DriDEdataframe,method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
#pander::pander(colnames(cormat)[apply(abs(cormat),2,max)>rhoThreshold])

pander::pander(c(Train=max(abs(cormat))))
Train
0.599
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
90% 95% 97.5% 99% 99.9%
0.15 0.209 0.276 0.369 0.537
pander::pander(c(HCA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
HCA_fraction
0

## Test Correlation
cormat <- cor(DriDEdataframe,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;

pander::pander(c(Test=max(abs(cormat))))
Test
0.769
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
90% 95% 97.5% 99% 99.9%
0.147 0.205 0.271 0.36 0.527
pander::pander(c(HCA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
HCA_fraction
7.44e-05

gplots::heatmap.2(abs(cormat),
                  trace = "none",
                  mar = c(5,5),
                  col=rev(heat.colors(5)),
                  breaks = breaks,
                  main = "Correlation after Driven-HCA",
                  cexRow = 0.25,
                  cexCol = 0.25,
                  srtCol=35,
                  srtRow=75,
                  key.title=NA,
                  key.xlab="Spearman Correlation",
                  xlab="Feature", ylab="Feature")

#hist(cormat,freq=FALSE,
#     density=NULL,
#     xlim=c(-1,1),
#     ylim=c(0,4.0),
#     main="Correlation after Driven-UPSTM",xlab="Spearman Correlation")

DeDrivDen <- density(cormat,from=-1,to=1)
par(op)

2.10.5 UPSTM Spearman

par(cex=1.0,cex.main=0.8)

## Train Correlation

cormat <- cor(DriDEdataframeSpear,method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
#pander::pander(colnames(cormat)[apply(abs(cormat),2,max)>rhoThreshold])

pander::pander(c(Train=max(abs(cormat))))
Train
0.835
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
90% 95% 97.5% 99% 99.9%
0.155 0.216 0.287 0.379 0.553
pander::pander(c(HCAS_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
HCAS_fraction
0.000316

## Test Correlation

cormat <- cor(predTestDriSpear,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;

pander::pander(c(Test=max(abs(cormat))))
Test
0.84
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
90% 95% 97.5% 99% 99.9%
0.157 0.225 0.296 0.387 0.556
pander::pander(c(HCAS_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
HCAS_fraction
0.000446

gplots::heatmap.2(abs(cormat),
                  trace = "none",
                  mar = c(5,5),
                  col=rev(heat.colors(5)),
                  breaks = breaks,
                  main = "Correlation: Driven/Spearman",
                  cexRow = 0.25,
                  cexCol = 0.25,
                  srtCol=35,
                  srtRow=75,
                  key.title=NA,
                  key.xlab="Spearman Correlation",
                  xlab="Feature", ylab="Feature")


#hist(cormat,freq=FALSE,
#     density=NULL,
#     xlim=c(-1,1),
#     ylim=c(0,4.0),
#     main="Correlation after UPSTM with Spearman",xlab="Spearman Correlation")

DeDrivSpearDen <- density(cormat,from=-1,to=1)
par(op)

2.10.6 PCA

par(cex=1.0,cex.main=0.8)



## Train Correlation

cormat <- cor(PCA_Train,method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;

pander::pander(c(Train=max(abs(cormat))))
Train
0.626
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
90% 95% 97.5% 99% 99.9%
0.119 0.155 0.204 0.317 0.531
pander::pander(c(PCA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
PCA_fraction
0.000164

## Test Correlation
cormat <- cor(PCA_Predicted,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;

pander::pander(c(Test=max(abs(cormat))))
Test
0.597
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
90% 95% 97.5% 99% 99.9%
0.115 0.149 0.198 0.316 0.532
pander::pander(c(PCA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
PCA_fraction
0

gplots::heatmap.2(abs(cormat),
                  trace = "none",
                  mar = c(5,5),
                  col=rev(heat.colors(5)),
                  breaks = breaks,
                  main = "Correlation after PCA",
                  cexRow = 0.25,
                  cexCol = 0.25,
                  srtCol=35,
                  srtRow=75,
                  key.title=NA,
                  key.xlab="Spearman Correlation",
                  xlab="Feature", ylab="Feature")



#hist(cormat,freq=FALSE,
#     density=NULL,
#     xlim=c(-1,1),
#     ylim=c(0,4.0),
#     main="Correlation after PCA",xlab="Spearman Correlation")

PCADen <- density(cormat,from=-1,to=1)

par(op)

2.10.7 EFA

par(cex=1.0,cex.main=0.8)

## Train Correlation

cormat <- cor(EFA_Train,method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;

pander::pander(c(Train=max(abs(cormat))))
Train
0.626
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
90% 95% 97.5% 99% 99.9%
0.119 0.155 0.203 0.316 0.531
pander::pander(c(EFA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
EFA_fraction
8.22e-05

## Test Correlation
cormat <- cor(EFA_Predicted,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;

pander::pander(c(Test=max(abs(cormat))))
Test
0.597
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
90% 95% 97.5% 99% 99.9%
0.117 0.153 0.205 0.314 0.526
pander::pander(c(EFA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
EFA_fraction
0

gplots::heatmap.2(abs(cormat),
                  trace = "none",
                  mar = c(5,5),
                  col=rev(heat.colors(5)),
                  breaks = breaks,
                  main = "Correlation after EFA",
                  cexRow = 0.25,
                  cexCol = 0.25,
                  srtCol=35,
                  srtRow=75,
                  key.title=NA,
                  key.xlab="Spearman Correlation",
                  xlab="Feature", ylab="Feature")



#hist(cormat,freq=FALSE,
#     density=NULL,
#     xlim=c(-1,1),
#     ylim=c(0,4.0),
#     main="Correlation after EFA",xlab="Spearman Correlation")

EFADen <- density(cormat,from=-1,to=1)
par(op)

2.10.8 PCA Whitening



## Train Correlation

cormat <- cor(PCAWhite_Train,method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;

pander::pander(c(Train=max(abs(cormat))))
Train
0.599
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
90% 95% 97.5% 99% 99.9%
0.0851 0.108 0.133 0.171 0.395
pander::pander(c(PCAWhite_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
PCAWhite_fraction
0

## Test Correlation
cormat <- cor(PCAWhitePredicted,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;

pander::pander(c(Test=max(abs(cormat))))
Test
0.582
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
90% 95% 97.5% 99% 99.9%
0.115 0.14 0.166 0.204 0.384
pander::pander(c(PCAWhite_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
PCAWhite_fraction
0

gplots::heatmap.2(abs(cormat),
                  trace = "none",
                  mar = c(5,5),
                  col=rev(heat.colors(5)),
                  breaks = breaks,
                  main = "Correlation after PCAWhite",
                  cexRow = 0.25,
                  cexCol = 0.25,
                  srtCol=35,
                  srtRow=75,
                  key.title=NA,
                  key.xlab="Spearman Correlation",
                  xlab="Feature", ylab="Feature")



#hist(cormat,freq=FALSE,
#     density=NULL,
#     xlim=c(-1,1),
#     ylim=c(0,4.0),
#     main="Correlation after PCAWhite",xlab="Spearman Correlation")

WhiteDen <- density(cormat,from=-1,to=1)
par(op)

2.11 The Density Plot

par(cex=0.7)
colors=c("red","blue","green","darkblue","darkgreen","purple","orange","darkred");

plot(rawDen,
     xlim=c(-1,1),
     ylim=c(0.001,7.0),
     col=colors[1],
     lty=1,
     lwd=4,
     log="y",
     main="Test: Correlation Distribution",xlab="Spearman Correlation")

lines(DeDen,col=colors[2],lty=2,lwd=4)
lines(DeSpearDen,col=colors[3],lty=3,lwd=4)
lines(DeDrivDen,col=colors[4],lty=4,lwd=2)
lines(DeDrivSpearDen,col=colors[5],lty=5,lwd=2)

lines(PCADen,col=colors[6],lty=6,lwd=1)
lines(EFADen,col=colors[7],lty=7,lwd=1)
lines(WhiteDen,col=colors[8],lty=8,lwd=1)

names=c("Raw","HCA:P","HCA:S","DHCA:P","DHCA:S","PCA","EFA","White:PCA")
#colors=c("red","blue","green","blue","green","purple","purple","gray");
lines=c(1,2,3,4,5,6,7,8)
lwds=c(4,4,4,2,2,1,1,1)

legend("topleft",names,col=colors,lty=lines,lwd=lwds,cex=0.50)

par(op)

2.11.1 Differences between train and test ROC AUC

par(op)
par(mfrow=c(1,1),cex=0.7)

AUCResults <- list();
diffAUC <- list();
thenames <- rownames(univar$orderframe)[(rownames(univar$orderframe) %in% colnames(testDataFrame))]
rawAUC <- univar$orderframe[thenames,"ROCAUC"]
thenames <- thenames[rawAUC >= aucTHR]
rawAUC <- univar$orderframe[thenames,"ROCAUC"]
rawAUCTest <- univarTest$orderframe[thenames,"ROCAUC"]
AUCResults$RAW <- rawAUCTest
diffAUC$RAW <-  rawAUCTest-rawAUC
plot(rawAUC,rawAUCTest-rawAUC,
     xlab="TRAIN ROC AUC",
     ylab="Test:AUC-Train:AUC",
     xlim=c(0.5,1.0),
     ylim=c(-0.25,0.25),
     pch=1,
     col=colors[1],
     main="ROC AUC Difference Between Test and Train")

thenames <- rownames(univarDe$orderframe)[!(rownames(univarDe$orderframe) %in% colnames(testDataFrame))]
HCAP <- univarDe$orderframe[thenames,"ROCAUC"]
thenames <- thenames[HCAP >= aucTHR]
HCAP <- univarDe$orderframe[thenames,"ROCAUC"]
HCAPTest <- univarDeTest$orderframe[thenames,"ROCAUC"]
AUCResults$HCAP <- HCAP
AUCResults$HCAP_T <- HCAPTest
diffAUC$HCAP <-  HCAPTest-HCAP

points(HCAP,HCAPTest-HCAP,pch=2,col=colors[2])

thenames <- rownames(univarDeSpear$orderframe)[!(rownames(univarDeSpear$orderframe) %in% colnames(testDataFrame))]
HCAS <- univarDeSpear$orderframe[thenames,"ROCAUC"]
thenames <- thenames[HCAS >= aucTHR]
HCAS <- univarDeSpear$orderframe[thenames,"ROCAUC"]
HCASTest <- univarDeSpearTest$orderframe[thenames,"ROCAUC"]
AUCResults$HCAS <- HCAS
AUCResults$HCAS_T <- HCASTest
diffAUC$HCAS <-  HCASTest-HCAS

points(HCAS,HCASTest-HCAS,pch=3,col=colors[3])

thenames <- rownames(univarDeDri$orderframe)[!(rownames(univarDeDri$orderframe) %in% colnames(testDataFrame))]
DHCAP <- univarDeDri$orderframe[thenames,"ROCAUC"]
thenames <- thenames[DHCAP >= aucTHR]
DHCAP <- univarDeDri$orderframe[thenames,"ROCAUC"]
DHCAPTest <- univarDeDriTest$orderframe[thenames,"ROCAUC"]
AUCResults$DHCAP <- DHCAP
AUCResults$DHCAP_T <- DHCAPTest
diffAUC$DHCAP <-  DHCAPTest-DHCAP

points(DHCAP,DHCAPTest-DHCAP,pch=4,col=colors[4])

thenames <- rownames(univarDeDriSpear$orderframe)[!(rownames(univarDeDriSpear$orderframe) %in% colnames(testDataFrame))]
DHCAS <- univarDeDriSpear$orderframe[thenames,"ROCAUC"]
thenames <- thenames[DHCAS >= aucTHR]
DHCAS <- univarDeDriSpear$orderframe[thenames,"ROCAUC"]
DHCASTest <- univarDeDriSpearTest$orderframe[thenames,"ROCAUC"]
AUCResults$DHCAS <- DHCAS
AUCResults$DHCAS_T <- DHCASTest
diffAUC$DHCAS <-  DHCASTest-DHCAS

points(DHCAS,DHCASTest-DHCAS,pch=5,col=colors[5])

thenames <- rownames(univarPCA$orderframe)[!(rownames(univarPCA$orderframe) %in% colnames(testDataFrame))]
PCA <- univarPCA$orderframe[thenames,"ROCAUC"]
thenames <- thenames[PCA >= aucTHR]
PCA <- univarPCA$orderframe[thenames,"ROCAUC"]
PCATest <- univarPCATest$orderframe[thenames,"ROCAUC"]
AUCResults$PCA <- PCA
AUCResults$PCA_T <- PCATest
diffAUC$PCA <-  PCATest-PCA

points(PCA,PCATest-PCA,pch=6,col=colors[6])

thenames <- rownames(univarEFA$orderframe)[!(rownames(univarEFA$orderframe) %in% colnames(testDataFrame))]
EFA <- univarEFA$orderframe[thenames,"ROCAUC"]
thenames <- thenames[EFA >= aucTHR]
EFA <- univarEFA$orderframe[thenames,"ROCAUC"]

EFATest <- univarEFATest$orderframe[thenames,"ROCAUC"]
AUCResults$EFA <- EFA
AUCResults$EFA_T <- EFATest
diffAUC$EFA <-  EFATest-EFA

points(EFA,EFATest-EFA,pch=7,col=colors[7])

thenames <- rownames(univarWhite$orderframe)[!(rownames(univarWhite$orderframe) %in% colnames(testDataFrame))]
WPCA <- univarWhite$orderframe[thenames,"ROCAUC"]
thenames <- thenames[WPCA >= aucTHR]
WPCA <- univarWhite$orderframe[thenames,"ROCAUC"]
WPCATest <- univarWhiteTest$orderframe[thenames,"ROCAUC"]
AUCResults$WPCA <- WPCA
AUCResults$WPCA_T <- WPCATest
diffAUC$WPCA <-  WPCATest-WPCA


points(WPCA,WPCATest-WPCA,pch=8,col=colors[8])


names=c("Raw","HCA:P","HCA:S","DHCA:P","DHCA:S","PCA","EFA","White:PCA")
pchs=c(1,2,3,4,5,6,7,8)

legend("bottomright",names,col=colors,pch=pchs,cex=0.50)

par(op)

2.11.2 Violin of differences


par(op)
par(mfrow=c(1,1),cex=0.7)

vioplot(diffAUC,
        ylim=c(-0.25,0.25),
        ylab="Test-Train",
        main="Test-Train Paired ROC AUC Difference",
        col=colors,
        cex.axis=0.6,
        las=2
)
stripchart(diffAUC, method = "jitter", col = "gray",
           vertical = TRUE, pch = 1, add = TRUE,cex=0.35)
points(1:length(diffAUC),lapply(diffAUC,mean), pch = 18, col = "yellow", cex = 2.25)

par(op)

2.11.3 Distribution of ROC AUC in latent Variables

par(op)
par(mfrow=c(1,1),cex=0.7)

colors2 <- length(AUCResults)
colors2[1] <- colors[1];
colors2[2*(1:(length(colors)-1))] <- colors[2:length(colors)]
colors2[1+2*(1:(length(colors)-1))] <- colors[2:length(colors)]
vioplot(AUCResults,
        ylim=c(0.3,1.0),
        ylab="ROC AUC",
        main="ROC AUC of Latent Variables",
        col=colors2,
        cex.axis=0.6,
        las=2
)
abline(h=0.5,col="black")
stripchart(AUCResults, method = "jitter", col = "gray",
           vertical = TRUE, pch = 1, add = TRUE,cex=0.35)
points(1:length(AUCResults),lapply(AUCResults,mean), pch = 18, col = "yellow", cex = 2.25)

par(op)

2.11.4 Differences between train and test Balanced Accuracy

par(op)
par(mfrow=c(1,1),cex=0.7)
BACCTHR <- aucTHR
BACCResults <- list();
diffBACC <- list();
thenames <- rownames(univar$orderframe)[(rownames(univar$orderframe) %in% colnames(testDataFrame))]
rawBACC <- univar$orderframe[thenames,"BACC"]
thenames <- thenames[rawBACC >= BACCTHR]
rawBACC <- univar$orderframe[thenames,"BACC"]
rawBACCTest <- univarTest$orderframe[thenames,"BACC"]
BACCResults$RAW <- rawBACCTest
diffBACC$RAW <-  rawBACCTest-rawBACC
plot(rawBACC,rawBACCTest-rawBACC,
     xlab="TRAIN Balanced Acc",
     ylab="Test:BACC-Train:BACC",
     xlim=c(0.5,1.0),
     ylim=c(-0.25,0.25),
     pch=1,
     col=colors[1],
     main="Balanced Acc Difference Between Test and Train")

thenames <- rownames(univarDe$orderframe)[!(rownames(univarDe$orderframe) %in% colnames(testDataFrame))]
HCAP <- univarDe$orderframe[thenames,"BACC"]
thenames <- thenames[HCAP >= BACCTHR]
HCAP <- univarDe$orderframe[thenames,"BACC"]
HCAPTest <- univarDeTest$orderframe[thenames,"BACC"]
BACCResults$HCAP <- HCAP
BACCResults$HCAP_T <- HCAPTest
diffBACC$HCAP <-  HCAPTest-HCAP

points(HCAP,HCAPTest-HCAP,pch=2,col=colors[2])

thenames <- rownames(univarDeSpear$orderframe)[!(rownames(univarDeSpear$orderframe) %in% colnames(testDataFrame))]
HCAS <- univarDeSpear$orderframe[thenames,"BACC"]
thenames <- thenames[HCAS >= BACCTHR]
HCAS <- univarDeSpear$orderframe[thenames,"BACC"]
HCASTest <- univarDeSpearTest$orderframe[thenames,"BACC"]
BACCResults$HCAS <- HCAS
BACCResults$HCAS_T <- HCASTest
diffBACC$HCAS <-  HCASTest-HCAS

points(HCAS,HCASTest-HCAS,pch=3,col=colors[3])

thenames <- rownames(univarDeDri$orderframe)[!(rownames(univarDeDri$orderframe) %in% colnames(testDataFrame))]
DHCAP <- univarDeDri$orderframe[thenames,"BACC"]
thenames <- thenames[DHCAP >= BACCTHR]
DHCAP <- univarDeDri$orderframe[thenames,"BACC"]
DHCAPTest <- univarDeDriTest$orderframe[thenames,"BACC"]
BACCResults$DHCAP <- DHCAP
BACCResults$DHCAP_T <- DHCAPTest
diffBACC$DHCAP <-  DHCAPTest-DHCAP

points(DHCAP,DHCAPTest-DHCAP,pch=4,col=colors[4])

thenames <- rownames(univarDeDriSpear$orderframe)[!(rownames(univarDeDriSpear$orderframe) %in% colnames(testDataFrame))]
DHCAS <- univarDeDriSpear$orderframe[thenames,"BACC"]
thenames <- thenames[DHCAS >= BACCTHR]
DHCAS <- univarDeDriSpear$orderframe[thenames,"BACC"]
DHCASTest <- univarDeDriSpearTest$orderframe[thenames,"BACC"]
BACCResults$DHCAS <- DHCAS
BACCResults$DHCAS_T <- DHCASTest
diffBACC$DHCAS <-  DHCASTest-DHCAS

points(DHCAS,DHCASTest-DHCAS,pch=5,col=colors[5])

thenames <- rownames(univarPCA$orderframe)[!(rownames(univarPCA$orderframe) %in% colnames(testDataFrame))]
PCA <- univarPCA$orderframe[thenames,"BACC"]
thenames <- thenames[PCA >= BACCTHR]
PCA <- univarPCA$orderframe[thenames,"BACC"]
PCATest <- univarPCATest$orderframe[thenames,"BACC"]
BACCResults$PCA <- PCA
BACCResults$PCA_T <- PCATest
diffBACC$PCA <-  PCATest-PCA

points(PCA,PCATest-PCA,pch=6,col=colors[6])

thenames <- rownames(univarEFA$orderframe)[!(rownames(univarEFA$orderframe) %in% colnames(testDataFrame))]
EFA <- univarEFA$orderframe[thenames,"BACC"]
thenames <- thenames[EFA >= BACCTHR]
EFA <- univarEFA$orderframe[thenames,"BACC"]

EFATest <- univarEFATest$orderframe[thenames,"BACC"]
BACCResults$EFA <- EFA
BACCResults$EFA_T <- EFATest
diffBACC$EFA <-  EFATest-EFA

points(EFA,EFATest-EFA,pch=7,col=colors[7])

thenames <- rownames(univarWhite$orderframe)[!(rownames(univarWhite$orderframe) %in% colnames(testDataFrame))]
WPCA <- univarWhite$orderframe[thenames,"BACC"]
thenames <- thenames[WPCA >= BACCTHR]
WPCA <- univarWhite$orderframe[thenames,"BACC"]
WPCATest <- univarWhiteTest$orderframe[thenames,"BACC"]
BACCResults$WPCA <- WPCA
BACCResults$WPCA_T <- WPCATest
diffBACC$WPCA <-  WPCATest-WPCA


points(WPCA,WPCATest-WPCA,pch=8,col=colors[8])


names=c("Raw","HCA:P","HCA:S","DHCA:P","DHCA:S","PCA","EFA","White:PCA")
pchs=c(1,2,3,4,5,6,7,8)

legend("bottomright",names,col=colors,pch=pchs,cex=0.50)

par(op)

2.11.5 Violin of differences


par(op)
par(mfrow=c(1,1),cex=0.7)

vioplot(diffBACC,
        ylim=c(-0.25,0.25),
        ylab="Test-Train",
        main="Test-Train Paired Balanced Acc Difference",
        col=colors,
        cex.axis=0.6,
        las=2
)
stripchart(diffBACC, method = "jitter", col = "gray",
           vertical = TRUE, pch = 1, add = TRUE,cex=0.35)
points(1:length(diffBACC),lapply(diffBACC,mean), pch = 18, col = "yellow", cex = 2.25)

par(op)

2.11.6 Distribution of Balanced Acc in latent Variables

par(op)
par(mfrow=c(1,1),cex=0.7)

colors2 <- length(BACCResults)
colors2[1] <- colors[1];
colors2[2*(1:(length(colors)-1))] <- colors[2:length(colors)]
colors2[1+2*(1:(length(colors)-1))] <- colors[2:length(colors)]
vioplot(BACCResults,
        ylim=c(0.3,1.0),
        ylab="Balanced Acc",
        main="Balanced Acc of Latent Variables",
        col=colors2,
        cex.axis=0.6,
        las=2
)
abline(h=0.5,col="black")
stripchart(BACCResults, method = "jitter", col = "gray",
           vertical = TRUE, pch = 1, add = TRUE,cex=0.35)
points(1:length(BACCResults),lapply(BACCResults,mean), pch = 18, col = "yellow", cex = 2.25)

par(op)

2.11.7 Differences between train and test IDI

par(op)
par(mfrow=c(1,1),cex=0.7)

testIDI <- list();
diffIDI <- list();
rawIDI <- univar$orderframe$IDI
rawIDITest <- univarTest$orderframe$IDI
testIDI$RAW <- rawIDITest
diffIDI$RAW <-  rawIDITest-rawIDI
plot(rawIDI,rawIDITest-rawIDI,
     xlab="TRAIN Test IDI",
     ylab="Test:IDI-Train:IDI",
     xlim=c(0,0.5),
     ylim=c(-0.2,0.2),
     pch=1,
     col=colors[1],
     main="Predict IDI Difference Between Test and Train")

HCAP <- univarDe$orderframe$IDI[!(rownames(univarDe$orderframe) %in% colnames(testDataFrame))]
HCAPTest <-univarDeTest$orderframe$IDI[!(rownames(univarDe$orderframe) %in% colnames(testDataFrame))]
testIDI$HCAP <- HCAP
testIDI$HCAP_T <- HCAPTest
diffIDI$HCAP <-  HCAPTest-HCAP

points(HCAP,HCAPTest-HCAP,pch=2,col=colors[2])

HCAS <- univarDeSpear$orderframe$IDI[!(rownames(univarDeSpearTest$orderframe) %in% colnames(testDataFrame))]
HCASTest <- univarDeSpearTest$orderframe$IDI[!(rownames(univarDeSpearTest$orderframe) %in% colnames(testDataFrame))]
testIDI$HCAS <- HCAS
testIDI$HCAS_T <- HCASTest
diffIDI$HCAS <-  HCASTest-HCAS

points(HCAS,HCASTest-HCAS,pch=3,col=colors[3])

DHCAP <- univarDeDri$orderframe$IDI[!(rownames(univarDeDriTest$orderframe) %in% colnames(testDataFrame))]
DHCAPTest <- univarDeDriTest$orderframe$IDI[!(rownames(univarDeDriTest$orderframe) %in% colnames(testDataFrame))]
testIDI$DHCAP <- DHCAP
testIDI$DHCAP_T <- DHCAPTest
diffIDI$DHCAP <-  DHCAPTest-DHCAP

points(DHCAP,DHCAPTest-DHCAP,pch=4,col=colors[4])

DHCAS <- univarDeDriSpear$orderframe$IDI[!(rownames(univarDeDriSpearTest$orderframe) %in% colnames(testDataFrame))]
DHCASTest <- univarDeDriSpearTest$orderframe$IDI[!(rownames(univarDeDriSpearTest$orderframe) %in% colnames(testDataFrame))]
testIDI$DHCAS <- DHCAS
testIDI$DHCAS_T <- DHCASTest
diffIDI$DHCAS <-  DHCASTest-DHCAS

points(DHCAS,DHCASTest-DHCAS,pch=5,col=colors[5])

PCA <- univarPCA$orderframe$IDI[!(rownames(univarPCA$orderframe) %in% colnames(testDataFrame))]
PCATest <- univarPCATest$orderframe$IDI[!(rownames(univarPCA$orderframe) %in% colnames(testDataFrame))]
testIDI$PCA <- PCA
testIDI$PCA_T <- PCATest
diffIDI$PCA <-  PCATest-PCA

points(PCA,PCATest-PCA,pch=6,col=colors[6])

EFA <- univarEFA$orderframe$IDI[!(rownames(univarEFA$orderframe) %in% colnames(testDataFrame))]
EFATest <- univarEFATest$orderframe$IDI[!(rownames(univarEFA$orderframe) %in% colnames(testDataFrame))]
testIDI$EFA <- EFA
testIDI$EFA_T <- EFATest
diffIDI$EFA <-  EFATest-EFA

points(EFA,EFATest-EFA,pch=7,col=colors[7])

WPCA <- univarWhite$orderframe$IDI[!(rownames(univarWhite$orderframe) %in% colnames(testDataFrame))]
WPCATest <- univarWhiteTest$orderframe$IDI[!(rownames(univarWhite$orderframe) %in% colnames(testDataFrame))]
testIDI$WPCA <- WPCA
testIDI$WPCA_T <- WPCATest
diffIDI$WPCA <-  WPCATest-WPCA


points(WPCA,WPCATest-WPCA,pch=8,col=colors[8])


names=c("Raw","HCA:P","HCA:S","DHCA:P","DHCA:S","PCA","EFA","White:PCA")
pchs=c(1,2,3,4,5,6,7,8)

legend("bottomright",names,col=colors,pch=pchs,cex=0.50)

par(op)

2.11.8 Violin of differences


par(op)
par(mfrow=c(1,1),cex=0.7)

vioplot(diffIDI,
        ylim=c(-0.2,0.2),
        ylab="Test-Train",
        main="Test-Train Paired Predict IDI Difference",
        col=colors,
        cex.axis=0.6,
        las=2
)
stripchart(diffIDI, method = "jitter", col = "gray",
           vertical = TRUE, pch = 1, add = TRUE,cex=0.35)
points(1:length(diffIDI),lapply(diffIDI,mean), pch = 18, col = "yellow", cex = 2.25)

par(op)

2.11.9 Distribution of Predict IDI in latent Variables

par(op)
par(mfrow=c(1,1),cex=0.7)

colors2 <- length(testIDI)
colors2[1] <- colors[1];
colors2[2*(1:(length(colors)-1))] <- colors[2:length(colors)]
colors2[1+2*(1:(length(colors)-1))] <- colors[2:length(colors)]
vioplot(testIDI,
        ylim=c(0.0,0.5),
        ylab="Predict IDI",
        main="Predict IDI of Latent Variables",
        col=colors2,
        cex.axis=0.6,
        las=2
)
stripchart(testIDI, method = "jitter", col = "gray",
           vertical = TRUE, pch = 1, add = TRUE,cex=0.35)
points(1:length(testIDI),lapply(testIDI,mean), pch = 18, col = "yellow", cex = 2.25)

par(op)

2.11.10 The tables


pander::pander(univarTest$orderframe[1:TopVariables,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP IDI ROCAUC cStatCorr
ADAS13 20.942 6.12688 14.011 5.53542 0.297 0.243 0.781 0.794
RAVLT_immediate 28.621 7.92670 37.760 10.17622 0.459 0.144 0.689 0.765
ADAS11 12.972 4.60884 8.675 3.67173 0.105 0.196 0.755 0.764
M_ST40CV 0.179 0.00881 0.187 0.00735 0.929 0.178 0.739 0.758
M_ST29SV 0.125 0.00689 0.132 0.00734 0.837 0.173 0.736 0.751
pander::pander(univarDeTest$orderframe[1:TopVariables,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP IDI ROCAUC cStatCorr
ADAS11 12.97235 4.60884 8.674712 3.67173 1.05e-01 0.1961 0.755 0.764
Hippocampus 0.15807 0.00857 0.166161 0.00928 7.63e-01 0.1643 0.733 0.738
FAQ 5.10606 4.92890 1.951923 2.93201 5.70e-12 0.1883 0.781 0.731
La_M_ST24TA -0.00142 0.00291 0.000636 0.00278 2.98e-01 0.0684 0.617 0.703
M_ST31TA 0.01867 0.00189 0.019955 0.00166 5.36e-01 0.1171 0.704 0.699
pander::pander(univarDeSpearTest$orderframe[1:TopVariables,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP IDI ROCAUC cStatCorr
ADAS11 12.9723 4.60884 8.6747 3.67173 1.05e-01 0.1961 0.755 0.764
M_ST24TA 0.0259 0.00426 0.0296 0.00378 6.49e-02 0.1182 0.684 0.743
FAQ 5.1061 4.92890 1.9519 2.93201 5.70e-12 0.1883 0.781 0.731
M_ST44TA 0.0206 0.00299 0.0228 0.00313 7.31e-01 0.0551 0.605 0.702
M_ST31TA 0.0187 0.00189 0.0200 0.00166 5.36e-01 0.1171 0.704 0.699
pander::pander(univarDeDriTest$orderframe[1:TopVariables,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP IDI ROCAUC cStatCorr
ADAS13 20.9420 6.12688 14.011 5.53542 2.97e-01 0.243 0.781 0.794
M_ST29SV 0.1250 0.00689 0.132 0.00734 8.37e-01 0.173 0.736 0.751
FAQ 5.1061 4.92890 1.952 2.93201 5.70e-12 0.188 0.781 0.731
RAVLT_perc_forgetting 76.3100 29.68048 52.252 32.34899 5.27e-02 0.104 0.685 0.712
M_ST52TA 0.0178 0.00174 0.019 0.00160 5.12e-01 0.111 0.702 0.709
pander::pander(univarDeDriSpearTest$orderframe[1:TopVariables,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP IDI ROCAUC cStatCorr
ADAS13 20.9420 6.12688 14.011 5.53542 2.97e-01 0.243 0.781 0.794
M_ST29SV 0.1250 0.00689 0.132 0.00734 8.37e-01 0.173 0.736 0.751
FAQ 5.1061 4.92890 1.952 2.93201 5.70e-12 0.188 0.781 0.731
RAVLT_perc_forgetting 76.3100 29.68048 52.252 32.34899 5.27e-02 0.104 0.685 0.712
M_ST52TA 0.0178 0.00174 0.019 0.00160 5.12e-01 0.111 0.702 0.709
pander::pander(univarPCATest$orderframe[1:TopVariables,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP IDI ROCAUC cStatCorr
RC5 -0.494 0.94917 0.314 0.90156 3.39e-01 0.1224 0.698 0.733
FAQ 5.106 4.92890 1.952 2.93201 5.70e-12 0.1883 0.781 0.731
RC1 -0.409 1.03506 0.260 0.88605 3.24e-01 0.0937 0.676 0.690
MMSE 27.000 1.74303 28.019 1.81768 2.43e-07 0.0770 0.672 0.668
M_ST53SV 0.143 0.00567 0.146 0.00676 2.99e-01 0.0381 0.598 0.634
pander::pander(univarEFATest$orderframe[1:TopVariables,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP IDI ROCAUC cStatCorr
MR6 -0.497 0.92084 0.316 0.85117 3.66e-01 0.1269 0.695 0.742
FAQ 5.106 4.92890 1.952 2.93201 5.70e-12 0.1883 0.781 0.731
MR1 -0.397 1.02251 0.252 0.90303 6.26e-01 0.0819 0.665 0.683
MMSE 27.000 1.74303 28.019 1.81768 2.43e-07 0.0770 0.672 0.668
M_ST53SV 0.143 0.00567 0.146 0.00676 2.99e-01 0.0381 0.598 0.634
pander::pander(univarWhiteTest$orderframe[1:TopVariables,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP IDI ROCAUC cStatCorr
L1 2.151 0.86834 1.386 0.96559 1.70e-01 0.1172 0.702 0.738
FAQ 5.106 4.92890 1.952 2.93201 5.70e-12 0.1883 0.781 0.731
L2 -2.238 0.97290 -2.909 0.92908 9.52e-01 0.0932 0.679 0.694
MMSE 27.000 1.74303 28.019 1.81768 2.43e-07 0.0770 0.672 0.668
M_ST53SV 0.143 0.00567 0.146 0.00676 2.99e-01 0.0381 0.598 0.634

topUni <- univar$orderframe$Name[1:TopVariables]
topDe <- univarDe$orderframe$Name[1:TopVariables]
topDeSpear <- univarDeSpear$orderframe$Name[1:TopVariables]
topDeDri <- univarDeDri$orderframe$Name[1:TopVariables]
topDeDriSpear <- univarDeDriSpear$orderframe$Name[1:TopVariables]
topPCA <- univarPCA$orderframe$Name[1:TopVariables]
topEFA <- univarEFA$orderframe$Name[1:TopVariables]
topPCAWhite <- univarWhite$orderframe$Name[1:TopVariables]

2.11.11 Model of top variables

par(mfrow=c(1,2),cex=0.6)

lmRAW <- glm(paste(outcome,"~."),
             trainDataFrame[,c(outcome,topUni)],
             family="binomial")
prRaw <- predictionStats_binary(cbind(testDataFrame[,outcome],predict(lmRAW,testDataFrame)),"Top Raw",cex=0.75)

Top Raw


lmDe <- glm(paste(outcome,"~."),
            DEdataframe[,c(outcome,topDe)],
            family="binomial")
prDe <- predictionStats_binary(cbind(predTestDe[,outcome],predict(lmDe,predTestDe)),"Top HCA:P",cex=0.75)

Top HCA:P


lmDeSpear <- glm(paste(outcome,"~."),
            DEdataframeSpear[,c(outcome,topDeSpear)],
            family="binomial")
prSpear <- predictionStats_binary(cbind(predTestDeSpear[,outcome],predict(lmDeSpear,predTestDeSpear)),"Top HCA:S",cex=0.75)

Top HCA:S


lmDri <- glm(paste(outcome,"~."),
            DriDEdataframe[,c(outcome,topDeDri)],
            family="binomial")
prDri <- predictionStats_binary(cbind(predTestDe[,outcome],predict(lmDri,predTestDri)),"Top DHCA:P",cex=0.75)

Top DHCA:P


lmDriSpear <- glm(paste(outcome,"~."),
            DriDEdataframeSpear[,c(outcome,topDeDriSpear)],
            family="binomial")
prDriSpear <- predictionStats_binary(cbind(predTestDriSpear[,outcome],predict(lmDriSpear,predTestDriSpear)),"Top DHCA:S",cex=0.7)

Top DHCA:S



lmPCA <- glm(paste(outcome,"~."),
            PCA_Train[,c(outcome,topPCA)],
            family="binomial")
prPCA <- predictionStats_binary(cbind(PCA_Predicted[,outcome],predict(lmPCA,PCA_Predicted)),"Top PCA",cex=0.75)

Top PCA



lmEFA <- glm(paste(outcome,"~."),
            EFA_Train[,c(outcome,topEFA)],
            family="binomial")
prEFA <- predictionStats_binary(cbind(EFA_Predicted[,outcome],predict(lmEFA,EFA_Predicted)),"Top EFA",cex=0.75)

Top EFA



lmPCAW <- glm(paste(outcome,"~."),
            PCAWhite_Train[,c(outcome,topPCAWhite)],
            family="binomial")
prWPCA <- predictionStats_binary(cbind(PCAWhitePredicted[,outcome],predict(lmPCAW,PCAWhitePredicted)),"Top White:PCA",cex=0.75)

Top White:PCA

par(op)

2.11.12 The Performance Tables and Plots


par(cex=0.6)

 aucs <- prRaw$aucs
  aucs <- rbind(aucs,prDe$aucs)
  aucs <- rbind(aucs,prSpear$aucs)
  aucs <- rbind(aucs,prDri$aucs)
  aucs <- rbind(aucs,prDriSpear$aucs)
  aucs <- rbind(aucs,prPCA$aucs)
  aucs <- rbind(aucs,prEFA$aucs)
  aucs <- rbind(aucs,prWPCA$aucs)

  
  rownames(aucs) <- c("RAW",
                        "HCA:P",
                        "HCA:S",
                        "DHCA:P",
                        "DHCA:S",
                        "PCA",
                        "EFA",
                        "WPCA"
                        )
  
  pander::pander(aucs)
  est lower upper
RAW 0.816 0.769 0.863
HCA:P 0.841 0.798 0.885
HCA:S 0.828 0.782 0.874
DHCA:P 0.844 0.800 0.887
DHCA:S 0.844 0.800 0.887
PCA 0.815 0.767 0.862
EFA 0.803 0.754 0.852
WPCA 0.816 0.769 0.863
  
  bpAUC <- barPlotCiError(as.matrix(aucs),
                          metricname = "ROC AUC",
                          thesets = "Test AUC",
                          themethod = rownames(aucs),
                          main = "ROC AUC",
                          offsets = c(0.5,1),
                          scoreDirection = ">",
                          ho=0.5,
                          args.legend = list(bg = "white",x="bottomleft",inset=c(0.0,0),cex=0.5),
                          col = terrain.colors(nrow(aucs))
                          )


  
 berror <- prRaw$berror
  berror <- rbind(berror,prDe$berror)
  berror <- rbind(berror,prSpear$berror)
  berror <- rbind(berror,prDri$berror)
  berror <- rbind(berror,prDriSpear$berror)
  berror <- rbind(berror,prPCA$berror)
  berror <- rbind(berror,prEFA$berror)
  berror <- rbind(berror,prWPCA$berror)


  rownames(berror) <- rownames(aucs)
  pander::pander(berror)
  50% 2.5% 97.5%
RAW 0.270 0.215 0.321
HCA:P 0.235 0.184 0.282
HCA:S 0.259 0.208 0.306
DHCA:P 0.241 0.187 0.293
DHCA:S 0.241 0.195 0.289
PCA 0.262 0.212 0.314
EFA 0.306 0.254 0.360
WPCA 0.274 0.224 0.328

  bpBER <- barPlotCiError(as.matrix(berror),
                          metricname = "Balanced Error Rate",
                          thesets = "Test BER",
                          themethod = rownames(aucs),
                          main = "Balanced Error Rate",
                          offsets = c(0.5,1),
                          scoreDirection = "<",
                          ho=0.5,
                          args.legend = list(bg = "white",x="topleft",inset=c(0.0,0),cex=0.5),
                          col = terrain.colors(nrow(aucs))
                          )

  par(op)